[1] 38 [1] TRUE
#583 genes are removed from dataset
#continue only with genes that are not on chrX or chrY
no_sex <- gencode[ !seqnames(gencode) %in% c("chrX", "chrY") ]
#create vector of these genes
gene_id <- no_sex$gene_id
#continue with only these genes and remove genes on Chr X or Y #18414 genes
genes_counts_ordered2 = genes_counts_ordered[ (rownames(genes_counts_ordered) %in% gene_id), ]#remove ununstim samples in metadata
metadata_cultured <- metadata_filt[metadata_filt$Stimulation != "ununstim",]
#check numbers
dim(metadata_cultured)## [1] 454 38
#check numbers per stimulation
table(metadata_filt$Stimulation)##
## ATP DEX IFNy IL4 LPS R848 TNFa unstim
## 5 10 79 8 128 42 47 135
## ununstim
## 42
#remove samples in genes counts datafile
#genes_counts_cultured <- genes_counts_ordered2[,metadata_cultured$Sample]
sexByDonor = unique(metadata_cultured[,c("Donor_id", "sex")])
#createDT(sexByDonor)
as.data.frame(t(as.matrix(unclass( table(sexByDonor$sex, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))| f | m |
|---|---|
| 35 | 32 |
# The variable to be tested must be a fixed effect
names(metadata_cultured) = tolower(names(metadata_cultured))
form <- ~ age + (1|donor_id) + stimulation + picard_pct_ribosomal_bases + region + picard_pct_mrna_bases + sex + picard_pct_percent_duplication + picard_pct_pf_reads_aligned
# estimate weights using linear mixed model of dream
#vobjDream = voomWithDreamWeights( genes_counts_cultured, form, metadata_cultured )
# Fit the dream model on each gene
#fit = (dream( vobjDream, form, metadata_cultured ))
#res_age <- data.frame(topTable(fit, coef='sexm', number=nrow(genes_counts_cultured), sort.by = "p"), check.names = F)
#male sex as coefficient
#save(res_sex, file ="res_sex.Rdata")load("~/Downloads/res_sex.Rdata")
sign_sex <- subset(res_sex, adj.P.Val < 0.15)
sign_sexsign_sex <- subset(res_sex, adj.P.Val < 0.10)
sign_sexsign_sex <- subset(res_sex, adj.P.Val < 0.05)
sign_sexres = res_sex
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-10,10) + ylim(0, 10) + ylab("-log10 pvalue")load("~/Documents/MiGASti/docs/res_name_LPS2.Rdata")#note. pvalues based on wilcox test of log2(tpm)+1 without additional correction(not based on pvalue DEGs with DREAM)
#set rownames to Sample
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude]
#Excludes the samples from filters.
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)## [1] 38
gencode_30 <- read.delim("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
genes_tpm_ordered <- genes_tpm_filt[,rownames(metadata_filt)]
#head(genes_tpm_ordered)
all(rownames(metadata_filt) == colnames (genes_tpm_ordered)) #TRUE## [1] TRUE
#remove ununstim samples in metadata
metadata_cultured <- metadata_filt[metadata_filt$Stimulation != "ununstim",]
#check numbers
dim(metadata_cultured)## [1] 454 38
#check numbers per stimulation
table(metadata_filt$Stimulation)##
## ATP DEX IFNy IL4 LPS R848 TNFa unstim
## 5 10 79 8 128 42 47 135
## ununstim
## 42
#remove samples in genes counts datafile
genes_tpm_cultured <- genes_tpm_ordered[,metadata_cultured$Sample]
#select male and female samples in metadata cultured
samples_male = metadata_cultured$Sample[metadata_cultured$sex %in% "m"]
samples_female = metadata_cultured$Sample[metadata_cultured$sex %in% "f"]
metadata4deg = metadata_cultured[rownames(metadata_cultured) %in% c(samples_male,samples_female) ,]
genes_voom_male_female = as.data.frame(genes_tpm_cultured[, colnames(genes_tpm_cultured) %in% c(samples_male, samples_female)])
# dim(genes_voom_male_female)
# dim(metadata4deg)
#create top 6 genes from DEG
deg_lists = res_sex[order(res_sex$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)#create variable with ensembl id
top_6$ensembl = rownames(top_6)
#create dataframe
gene2check = as.data.frame(genes_voom_male_female[top_6$ensembl, ])
#merge dataframe with symbol (gene_id)
gene2check$ensembl = rownames(gene2check)
top_6 <- merge(gencode_30, top_6, by = "ensembl")
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
#create dataframe for plots
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))## Warning in melt(gene2check, id.vars = c("ensembl", "symbol")): The melt generic
## in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
## namespace like reshape2::melt(gene2check). In the next version, this warning
## will become an error.
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
# show direction of effect for male versus female
ggplot(gene2check_charac, aes(x = sex, y = value, fill = sex)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
res_sex$ensembl = rownames(res_sex)
res_sex_diff <- merge(res_sex, gencode_30, by = "ensembl")
res_sex_diff_top = res_sex_diff[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_sex_diff_top)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html